from pyspark import SparkContext

sc = SparkContext(master='local', appName='word_count')

# 统计班级的人数，将统计结果保存到mysql中

student_rdd = sc.textFile("../../data/students.txt", 100)

ids = ["1500100065", "1500100069", "1500100094", "1500100110"]


ids_bro = sc.broadcast(ids)

def filter_func(stu):
    id = stu.split(',')[0]

    ids = ids_bro.value

    return id in ids

filter_rdd = student_rdd.filter(filter_func)


filter_rdd.foreach(print)